In the present analysis we will study the interest of the AI community on a AI benchmarks. We focus on “interest” rather than “progress” for AI benchmarks as this is something we can compute using some proxies. In this particular case, we use the number of (normalised) hits (number of documents) obtained from AItopics per benchmark over the last decade (2008-2019). Note that the results from 2019 are incomplete.

The benchmarks in the present analysis rely on our own previous analysis and annotation of papers, as well as on open resources such as Papers With Code, including data from from several repositories (e.g, EFF, NLP-progress, SQuAD, RedditSota, etc.).

prepareVis <- function(data, rangeMean = 5, norm = T, years = 2008:2019) {

  
  set.seed(288)
  interest.df <- data
  
  ini <- which(colnames(interest.df) == paste0("X",years[1]))
  fin <- which(colnames(interest.df) == paste0("X",years[length(years)]))
  colnames(interest.df)[ini:fin] <- as.character(2008:2019)
  
  
  r <- 1:((length(years)-rangeMean-1))
  years.range <- years[-r]
  interest.df = colwise(type.convert)(interest.df)
  interest.df$mean.Interest <- rowSums(select(interest.df, as.character(years.range)))/(rangeMean+1)
  
  
  # filter(interest.df, is.na(R2))
  # filter(interest.df, R2=="ERROR")
  # # interest.df <- filter(interest.df, !is.na(R2))
  
  
  interest.df$category  <- NA
  for(i in 1:nrow(interest.df)){
    interest.df$category[i] <- str_trim(str_split(interest.df$TaskHierarchies[i], pattern = ">")[[1]][1])
  } # unique(interest.df$category)
  
  
  cogAbs <- c("MP", "SI", "VP", "AP", "AS", "PA", "CE", "CO", "EC", "NV", "CL", "QL", "MS", "MC")
  
  interest <- select(interest.df, one_of(c("keyword", "category",cogAbs,"mean.Interest")))
  keywords <- interest$keyword
  categories <- interest$category
  rownames(interest) <- keywords
  
  
  interest <- interest[,-(1:2)]
  interest["ILSVRC","QL"] <- 0 # <------------------- check it!
  interest = colwise(type.convert)(interest)
  rownames(interest) <- keywords
  interest[interest$mean.Interest ==0, "mean.Interest"]<- 0.0000000000001
  
  
  interest.mean <- interest$mean.Interest
  
  
  if(norm){
    # interest.mean <-  (interest.mean-min(interest.mean))/(max(interest.mean)-min(interest.mean))
    interest <- select(interest, -mean.Interest)
    interest.mean.norm <- normalize(interest.mean+0.000001, method = "scale", range = c(0,1))
    range(interest.mean.norm)
    interest.pond <- interest * interest.mean.norm
    return(list(interest.pond, interest, interest.mean, interest.mean.norm))
    
  }else{
    interest <- select(interest, -mean.Interest)
    interest.pond <- interest * interest.mean
    return(list(interest.pond, interest, interest.mean, interest.mean))
  }
  
}
plotVis <- function(data, categories, norm = T){
    set.seed(288)


  # shapes =  c("square", "triangle", "box", "circle", "dot", "star",
  #             "ellipse", "database", "text", "diamond", "square", "triangle","box")
  # vis$nodes$shape <- c(shapes[as.numeric(as.factor(categories))], rep("#dot",14))
  
  colours = c("1" = "blalck", "2" = "#543005","3" = "#8c510a","4" = "#bf812d",
              "5" = "#dfc27d","6" = "#f6e8c3","7" = "#f5f5f5","8" = "#c7eae5",
              "9" = "#80cdc1", "10" = "#35978f", "11" = "#01665e", "12" = "#003c30", "13" = "#FAFAFA")
  
  vis <- toVisNetworkData(graph_from_incidence_matrix(data, directed = F, weighted = T))
  
  vis$nodes$value = c(rep(10, nrow(vis$nodes)-14), colSums(data)*10000)
  vis$nodes$title <- vis$nodes$label
  vis$nodes$category <- c(categories, rep("CogAb", 14))
  vis$nodes$group <- vis$nodes$category 
  vis$nodes$color <- colours[as.numeric(as.factor(vis$nodes$category))]
 
  # vis$edges$value <- log(vis$edges$weight+1)
  vis$edges$value <- log(normalize(vis$edges$weight+0.00001, method = "range", range = c(0,1))+0.00001)
  
  # vis$edges$width <- vis$edges$weight

  v <- visNetwork(vis$nodes, vis$edges,  height = "1000px", width = "100%") %>% 
    visEdges(arrows = "to", color = list(color = 'rgba(70,130,180,0.3)', highlight ="#4682B4")) %>%
    visIgraphLayout(
      physics = F,
      randomSeed = 2017,
      layout = "layout_with_fr"
    ) %>%  
    visInteraction(navigationButtons = TRUE) %>% 
    visOptions(selectedBy = "group",highlightNearest = TRUE )
  
  return(v)
  
}

Mean Interest per AI benchmak

In this two plots we show how the mean interest per AI benchmark has varied among different periods (i.e., last decade, last lustrum and last year).

There are no big changes…

Plot legend:

interest.df <- read.xlsx2("interest_kw_processed_raw_slope.xlsx", sheetIndex = 1)

df.interest <- data.frame(Benchmark = rownames(prepareVis(interest.df, length(years)-1, norm = F)[[1]]),
                          Last.Decade = prepareVis(interest.df, length(years)-1, norm = F)[[3]], 
                          Last.Lustrum = prepareVis(interest.df, 5, norm = F)[[3]], 
                          Last.Year = prepareVis(interest.df, 1, norm = F)[[3]])

df.interest.m <- melt(df.interest, id.vars = "Benchmark")

a <- ggplot(df.interest.m, aes(reorder(Benchmark,value), value, colour = variable)) + 
  geom_point(alpha = 1/3, size = 3.5) + ylab("Mean Interest") + ylab("") + 
  coord_flip() + theme_minimal() + theme(legend.position="bottom")

b <- ggplot(df.interest.m, aes(reorder(Benchmark,value), log(value), colour = variable)) + 
  geom_point(alpha = 1/3, size = 3.5) + ylab("log(Mean Interest)") + ylab("") + 
  coord_flip() + theme_minimal() + theme(legend.position="bottom")

a
b

Mapping between AI benchmarks and Cognitive Abilities

Graphical representation

  • Benchmarks are grouped and coloured by area (groups from https://paperswithcode.com/)
  • Cognitive abilities are coloured in black and its size represent its relevance (total sum in the mapping) ponderated by interest (previous plot).
  • Edges represent that an ability is assigned to a task.
  • The width of the edges represent “interest” on the benchmark: the wider the edge, the more interest from the community during the last decade (mean).

Note that we can perform exactly the same analysis focusing on different (ranges of) years and obtaining the same graph but the width of the edges may vary (a little bit).

(Network are interactive!)

Last decade (2008-2019)

interest.df$category  <- NA
for(i in 1:nrow(interest.df)){
  interest.df$category[i] <- str_trim(str_split(interest.df$TaskHierarchies[i], pattern = ">")[[1]][1])
}

categories <- interest.df$category


interest.pond.All <- prepareVis(interest.df, length(years)-1, norm = F)
plotVis(interest.pond.All[[1]], categories)

Last lustrum (2014-2019)

Almost unnoticeable differences regarding the with of the edges (due to de size of the graph and the small variations regarding mean interest). We already saw it in the first two plots.

interest.pond.5 <- prepareVis(interest.df, 5, norm = F)
plotVis(interest.pond.5[[1]], categories)

Relevance of the cognitive abilities in diferent periods

# barplot(colSums(interest.pond.All[[1]]), main = "Total sum (pondered by mean interest): Last Decade ") # interest.sumcols
# barplot(colSums(interest.pond.5[[1]]), main = "Total sum (pondered by mean interest): Last lustrum") # interest.sumcols


dataALL <- interest.pond.All[[1]]
dataALL$period <- "2008-2019"
dataALL$benchmark <- rownames(dataALL)
meltALL <- melt(dataALL, id.vars = c("benchmark", "period"))


data5 <- interest.pond.5[[1]]
data5$period <- "2014-2019"
data5$benchmark <- rownames(data5)
melt5 <- melt(data5, id.vars = c("benchmark", "period"))


data3 <- prepareVis(interest.df, 3, norm = F)[[1]]
data3$period <- "2017-2019"
data3$benchmark <- rownames(data3)
melt3 <- melt(data3, id.vars = c("benchmark", "period"))


# data1 <- prepareVis(interest.df, 1, norm = F)[[1]]
# data1$period <- "2019"
# data1$benchmark <- rownames(data1)
# melt1 <- melt(data1, id.vars = c("benchmark", "period"))

all <- rbind(meltALL,melt5,melt3)


all.s <- summarise(group_by(all, benchmark, period, variable), mean = mean(value))
ggplotly(ggplot(all.s, aes(variable,mean, fill = period)) + geom_bar(stat = "identity",position = "dodge") + xlab("") + ylab("Mean Interest") + 
           scale_fill_brewer(palette = "Paired") + theme_minimal())

Interest per benchmark.

(Groups from https://paperswithcode.com/)

Computer Vision

plotIterest.Cat(interest.m, "Computer Vision")

Graphs

plotIterest.Cat(interest.m, "Graphs")

Natural Language Processing

plotIterest.Cat(interest.m, "Natural Language Processing")

Playing Games

plotIterest.Cat(interest.m, "Playing Games")

Miscellaneous

plotIterest.Cat(interest.m, "Miscellaneous")

Medical

plotIterest.Cat(interest.m, "Medical")

Methodology

plotIterest.Cat(interest.m, "Methodology")

Speech

plotIterest.Cat(interest.m, "Speech")

Reasoning

plotIterest.Cat(interest.m, "Reasoning")

Time Series

plotIterest.Cat(interest.m, "Time Series")

Computer Code

plotIterest.Cat(interest.m, "Computer Code")